import sys
sys.setrecursionlimit(10000)
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
import os
os.environ['GNUMPY_IMPLICIT_CONVERSION'] = 'allow'
print os.environ.get('GNUMPY_IMPLICIT_CONVERSION')
import cPickle
import gzip
from breze.learn.data import one_hot
from breze.learn.base import cast_array_to_local_type
from breze.learn.utils import tile_raster_images
import climin.stops
import climin.initialize
from climin import mathadapt as ma
from breze.learn import hvi
from breze.learn.hvi import HmcViModel
from breze.learn.hvi.energies import (NormalGaussKinEnergyMixin, DiagGaussKinEnergyMixin)
from breze.learn.hvi.inversemodels import MlpGaussInvModelMixin
from matplotlib import pyplot as plt
from matplotlib import cm
import numpy as np
#import fasttsne
from IPython.html import widgets
%matplotlib inline
import theano
theano.config.compute_test_value = 'ignore'#'raise'
from theano import (tensor as T, clone)
datafile = '../mnist.pkl.gz'
# Load data.
with gzip.open(datafile,'rb') as f:
train_set, val_set, test_set = cPickle.load(f)
X, Z = train_set
VX, VZ = val_set
TX, TZ = test_set
Z = one_hot(Z, 10)
VZ = one_hot(VZ, 10)
TZ = one_hot(TZ, 10)
X_no_bin = X
VX_no_bin = VX
TX_no_bin = TX
# binarize the MNIST data
np.random.seed(0)
X = np.random.binomial(1, X) * 1.0
VX = np.random.binomial(1, VX) * 1.0
TX = np.random.binomial(1, TX) * 1.0
image_dims = 28, 28
X_np, Z_np, VX_np, VZ_np, TX_np, TZ_np, X_no_bin_np, VX_no_bin_np, TX_no_bin_np = X, Z, VX, VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin
X, Z, VX, VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin = [cast_array_to_local_type(i)
for i in (X, Z, VX,VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin)]
print X.shape
fig, ax = plt.subplots(figsize=(9, 9))
img = tile_raster_images(X_np[:64], image_dims, (8, 8), (1, 1))
ax.imshow(img, cmap=cm.binary)
fast_dropout = False
if fast_dropout:
class MyHmcViModel(HmcViModel,
hvi.FastDropoutMlpBernoulliVisibleVAEMixin,
hvi.FastDropoutMlpGaussLatentVAEMixin,
DiagGaussKinEnergyMixin,
MlpGaussInvModelMixin):
pass
kwargs = {
'p_dropout_inpt': .1,
'p_dropout_hiddens': [.2, .2],
}
print 'yeah'
else:
class MyHmcViModel(HmcViModel,
hvi.MlpBernoulliVisibleVAEMixin,
hvi.MlpGaussLatentVAEMixin,
DiagGaussKinEnergyMixin,
MlpGaussInvModelMixin):
pass
kwargs = {}
batch_size = 500
#optimizer = 'rmsprop', {'step_rate': 1e-4, 'momentum': 0.95, 'decay': .95, 'offset': 1e-6}
#optimizer = 'adam', {'step_rate': .5, 'momentum': 0.9, 'decay': .95, 'offset': 1e-6}
optimizer = 'adam', {'step_rate': 0.0005}
# This is the number of random variables NOT the size of
# the sufficient statistics for the random variables.
n_latents = 2
n_hidden = 200
m = MyHmcViModel(X.shape[1], n_latents,
[n_hidden, n_hidden], ['softplus'] * 2,
[n_hidden, n_hidden], ['rectifier'] * 2,
[n_hidden, n_hidden], ['rectifier'] * 2,
n_hmc_steps=1, n_lf_steps=4,
n_z_samples=1,
optimizer=optimizer, batch_size=batch_size, allow_partial_velocity_update=False, perform_acceptance_step=False,
compute_transition_densities=True,
**kwargs)
climin.initialize.randomize_normal(m.parameters.data, 0.1, 1e-1)
#m.parameters.__setitem__(m.hmc_sampler.step_size_param, 0.2)
#m.parameters.__setitem__(m.kin_energy.mlp.layers[-1].bias, 1)
old_best_params = None
#print m.score(TX)
print m.parameters.data.shape
FILENAME = 'hvi_gen2_recog2_aux2_late2_hid200_1hmc_04lf_np_R1.pkl'
# In[5]:
#old_best_params = None
f = open(FILENAME, 'rb')
np_array = cPickle.load(f)
old_best_params = cast_array_to_local_type(np_array)
f.close()
print old_best_params.shape
m.parameters.data = old_best_params.copy()
#old_best_loss = m.score(VX)
print m.score(VX)
print m.score(TX)
print m.parameters.view(m.init_recog.mlp.layers[2].bias)
#m.parameters.__setitem__(m.hmc_sampler.step_size_param, 0.2)
#m.parameters.__setitem__(m.init_recog.mlp.layers[2].bias, cast_array_to_local_type(np.array([ 0.14096579, 0.53084856, -1.75648117, -2.5653615 ])))
#m.parameters.__setitem__(m.kin_energy.variance_parameter, cast_array_to_local_type(np.array([0.01, -0.01])))
print 0.1 * m.parameters.view(m.hmc_sampler.step_size_param) ** 2 + 1e-8
print m.estimate_nll(TX, 200)
print m.score(VX_no_bin)
print m.score(TX_no_bin)
m._f_loss, m._f_dloss = m._make_loss_functions()
from theano.printing import debugprint
#debugprint(m._f_dloss.theano_func)
print m.parameters._var_to_shape
print
print m.parameters._var_to_slice
grad = m._f_dloss(m.parameters.data, X[:625])
print (abs(grad) > 2).sum()
print (abs(grad) > 10).sum()
x = (abs(grad) > 8).astype('int32')
matchings_indices = [ i for i, value in enumerate(x) if value == 1 ]
print matchings_indices
grad_array = np.zeros((100, len(m.parameters.data)))
for i in range(100):
grad_array[i, :] = m._f_dloss(m.parameters.data, X[:625])
mean_grad = grad_array.mean(axis=0)
print abs(mean_grad).mean()
print mean_grad.min(), mean_grad.argmin()
print mean_grad.max(), mean_grad.argmax()
std_grad = grad_array.std(axis=0)
print std_grad.mean()
print std_grad.min(), std_grad.argmin()
print std_grad.max(), std_grad.argmax()
print mean_grad[std_grad.argmax()]
n, bins, patches = plt.hist(std_grad, 40)
print n.astype('int32')
TARGET_FILENAME = 'hvi_gen2_recog2_aux2_late2_hid200_pretr_latetrans_3hmc_04lf'
FILETYPE_EXTENSION = '.pkl'
old_best_params = None
max_passes = 100
max_iter = max_passes * X.shape[0] / batch_size
n_report = X.shape[0] / batch_size
stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)
# theano.config.optimizer = 'fast_compile'
for i, info in enumerate(m.powerfit((X_no_bin,), (VX,), stop, pause, eval_train_loss=False)):
print i, m.score(X[:10000]).astype('float32'), info['val_loss'], np.exp(m.parameters.view(m.kin_energy.variance_parameter).as_numpy_array()), 0.1*m.parameters.view(m.hmc_sampler.step_size_param).as_numpy_array()**2 + 1e-8
if i == 0 and old_best_params is not None:
if info['best_loss'] > old_best_loss:
info['best_loss'] = old_best_loss
info['best_pars'] = old_best_params
if info['best_loss'] == info['val_loss']:
f = open(TARGET_FILENAME + FILETYPE_EXTENSION, 'wb')
cPickle.dump(m.parameters.data, f, protocol=cPickle.HIGHEST_PROTOCOL)
f.close()
train_result_params = m.parameters.data.copy()
m.parameters.data = info['best_pars']
m.score(VX)
f_z_init_sample = m.function(['inpt'], m.init_recog.sample(), numpy_result=True)
f_z_sample = m.function(['inpt'], m.hmc_sampler.output, numpy_result=True)
f_gen = m.function([m.gen.inpt], m.gen.sample(), numpy_result=True)
f_gen_rate = m.function([m.gen.inpt], m.gen.rate, numpy_result=True)
f_joint_nll = m.function(['inpt'], m.joint_nll, numpy_result=True)
curr_pos = T.matrix('current_position')
curr_vel = T.matrix('current_velocity')
norm_noise = T.matrix('normal_noise')
unif_noise = T.vector('uniform_noise')
new_sampled_vel = m.hmc_sampler.kin_energy.sample(norm_noise)
updated_vel = m.hmc_sampler.partial_vel_constant * curr_vel + m.hmc_sampler.partial_vel_complement * new_sampled_vel
performed_hmc_steps = m.hmc_sampler.perform_hmc_steps(curr_pos, curr_vel)
hmc_step = m.hmc_sampler.hmc_step(curr_pos, curr_vel, np.float32(0), norm_noise, unif_noise)
lf_step_results = m.hmc_sampler.simulate_dynamics(curr_pos, curr_vel, return_full_list=True)
z2_by_z0_0 = T.grad(lf_step_results[0][1, 0, 0], curr_pos)
z2_by_z0_1 = T.grad(lf_step_results[0][1, 0, 1], curr_pos)
z2_by_z0 = T.concatenate([z2_by_z0_0, z2_by_z0_1], axis=0)
f_z2_by_z0 = m.function(['inpt', curr_pos, curr_vel], z2_by_z0, numpy_result=True, on_unused_input='warn')
z3_by_z0_0 = T.grad(lf_step_results[0][2, 0, 0], curr_pos)
z3_by_z0_1 = T.grad(lf_step_results[0][2, 0, 1], curr_pos)
z3_by_z0 = T.concatenate([z3_by_z0_0, z3_by_z0_1], axis=0)
f_z3_by_z0 = m.function(['inpt', curr_pos, curr_vel], z3_by_z0, numpy_result=True, on_unused_input='warn')
z4_by_z0_0 = T.grad(lf_step_results[0][3, 0, 0], curr_pos)
z4_by_z0_1 = T.grad(lf_step_results[0][3, 0, 1], curr_pos)
z4_by_z0 = T.concatenate([z4_by_z0_0, z4_by_z0_1], axis=0)
f_z4_by_z0 = m.function(['inpt', curr_pos, curr_vel], z4_by_z0, numpy_result=True, on_unused_input='warn')
z1_by_z0_0 = T.grad(lf_step_results[0][0, 0, 0], curr_pos)
z1_by_z0_1 = T.grad(lf_step_results[0][0, 0, 1], curr_pos)
z1_by_z0 = T.concatenate([z1_by_z0_0, z1_by_z0_1], axis=0)
f_z1_by_z0 = m.function(['inpt', curr_pos, curr_vel], z1_by_z0, numpy_result=True, on_unused_input='warn')
zT_by_z0_0 = T.grad(performed_hmc_steps[0][-1, 0, 0], curr_pos)
zT_by_z0_1 = T.grad(performed_hmc_steps[0][-1, 0, 1], curr_pos)
zT_by_z0 = T.concatenate([zT_by_z0_0, zT_by_z0_1], axis=0)
f_zT_by_z0 = m.function(['inpt', curr_pos, curr_vel], zT_by_z0, numpy_result=True, on_unused_input='warn')
f_pot_en = m.function(['inpt', curr_pos], m.hmc_sampler.eval_pot_energy(curr_pos), numpy_result=True)
f_kin_en = m.function(['inpt', curr_vel], m.kin_energy.nll(curr_vel).sum(-1), numpy_result=True)
f_perform_hmc_steps = m.function(['inpt', curr_pos, curr_vel],
T.concatenate([performed_hmc_steps[0], performed_hmc_steps[1]], axis=1))
f_hmc_step = m.function(['inpt', curr_pos, curr_vel, norm_noise, unif_noise],
T.concatenate([hmc_step[0], hmc_step[1]],axis=1), on_unused_input='warn')
f_kin_energy_sample_from_noise = m.function(['inpt', norm_noise], new_sampled_vel)
f_updated_vel_from_noise = m.function(['inpt', curr_vel, norm_noise], updated_vel)
f_perform_lf_steps = m.function(['inpt', curr_pos, curr_vel],
T.concatenate([lf_step_results[0], lf_step_results[1]], axis=0))
f_z_init_mean = m.function(['inpt'], m.init_recog.mean, numpy_result=True)
f_z_init_var = m.function(['inpt'], m.init_recog.var, numpy_result=True)
f_v_init_var = m.function(['inpt'], T.extra_ops.cpu_contiguous(m.kin_energy.var), numpy_result=True)
full_sample = m.hmc_sampler.sample_with_path()
f_full_sample = m.function(['inpt'], T.concatenate([full_sample[0], full_sample[1]], axis=1))
pos = T.matrix('pos')
vel = T.matrix('vel')
updated_vel = T.matrix('updated_vel')
time_step = T.vector('time_step')
aux_inpt_replacements = {m.auxiliary_inv_model_inpt['position']: pos,
m.auxiliary_inv_model_inpt['time']: T.cast(time_step[0], dtype='float32')}
if 'velocity' in m.auxiliary_inv_model_inpt: # only use the updated velocity if it is part of the model
aux_inpt_replacements.update({m.auxiliary_inv_model_inpt['velocity']: updated_vel})
aux_inv_model_var = clone(m.auxiliary_inv_model.var, replace=aux_inpt_replacements)
aux_inv_model_mean = clone(m.auxiliary_inv_model.mean, replace=aux_inpt_replacements)
aux_inv_model_nll = clone(m.auxiliary_inv_model.nll(vel).sum(-1), replace=aux_inpt_replacements)
if 'velocity' not in m.auxiliary_inv_model_inpt:
f_aux_inv_var = m.function(['inpt', pos, time_step], aux_inv_model_var, numpy_result=True)
f_aux_inv_mean = m.function(['inpt', pos, time_step], aux_inv_model_mean, numpy_result=True)
f_aux_inv_nll = m.function(['inpt', pos, time_step, vel], aux_inv_model_nll, numpy_result=True)
else:
f_aux_inv_var = m.function(['inpt', pos, updated_vel, time_step], aux_inv_model_var, numpy_result=True)
f_aux_inv_mean = m.function(['inpt', pos, updated_vel, time_step], aux_inv_model_mean, numpy_result=True)
f_aux_inv_nll = m.function(['inpt', pos, updated_vel, time_step, vel], aux_inv_model_nll, numpy_result=True)
final_vel_inpt_replacements = {m.final_vel_model_inpt['position']: pos,
m.final_vel_model_inpt['time']: T.cast(m.n_hmc_steps, dtype='float32')}
final_vel_model_var = clone(m.final_vel_model.var, replace=final_vel_inpt_replacements)
final_vel_model_mean = clone(m.final_vel_model.mean, replace=final_vel_inpt_replacements)
final_vel_model_nll = clone(m.final_vel_model.nll(vel).sum(-1), replace=final_vel_inpt_replacements)
f_v_final_var = m.function(['inpt', pos], final_vel_model_var, numpy_result=True)
f_v_final_mean = m.function(['inpt', pos], final_vel_model_mean, numpy_result=True)
f_v_final_model_nll = m.function(['inpt', pos, vel], final_vel_model_nll, numpy_result=True)
f_kin_energy_nll = m.function(['inpt'], m.kin_energy.expected_nll, numpy_result=True)
f_init_recog_nll = m.function(['inpt'], m.init_recog.expected_nll.sum(-1), numpy_result=True)
pos = T.matrix()
f_pot_en_deriv = m.function(['inpt', pos], m.hmc_sampler.eval_pot_energy_deriv(pos))
f_pot_en_2nd_deriv0 = m.function(['inpt', m.hmc_sampler.pot_energy_inpt], T.grad(m.hmc_sampler.pot_energy_deriv[0, 0], m.hmc_sampler.pot_energy_inpt))
f_pot_en_2nd_deriv1 = m.function(['inpt', m.hmc_sampler.pot_energy_inpt], T.grad(m.hmc_sampler.pot_energy_deriv[0, 1], m.hmc_sampler.pot_energy_inpt))
print f_init_recog_nll(VX).mean()
init_var = f_z_init_var(VX)
print init_var.mean()
print init_var.max()
print init_var.min()
print
print f_joint_nll(TX).mean()
fig, axs = plt.subplots(2, 3, figsize=(27, 18))
### Original data
O = (X_no_bin_np[:64])[:, :784].astype('float32')
img = tile_raster_images(O, image_dims, (8, 8), (1, 1))
axs[0, 0].imshow(img, cmap=cm.binary)
O2 = (X_np[:64])[:, :784].astype('float32')
img = tile_raster_images(O2, image_dims, (8, 8), (1, 1))
axs[1, 0].imshow(img, cmap=cm.binary)
### Reconstruction
#z_sample = f_z_sample((X[:64]))
z_init_sample = cast_array_to_local_type(f_z_init_sample((X[:64])))
z_sample = f_perform_hmc_steps((X[:64]),
z_init_sample,
f_kin_energy_sample_from_noise((X[:64]),
cast_array_to_local_type(np.random.normal(size=(64, m.n_latent)).astype('float32')))
)[-1, :64, :]
R = f_gen_rate(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R, image_dims, (8, 8), (1, 1))
axs[0, 1].imshow(img, cmap=cm.binary)
Rinit = f_gen_rate(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit, image_dims, (8, 8), (1, 1))
axs[0, 2].imshow(img, cmap=cm.binary)
R2 = f_gen(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R2, image_dims, (8, 8), (1, 1))
axs[1, 1].imshow(img, cmap=cm.binary)
Rinit2 = f_gen(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit2, image_dims, (8, 8), (1, 1))
axs[1, 2].imshow(img, cmap=cm.binary)
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
prior_sample = cast_array_to_local_type(np.random.randn(64, m.n_latent))
S = f_gen_rate(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
axs[0].imshow(img, cmap=cm.binary)
S2 = f_gen(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S2, image_dims, (8, 8), (1, 1))
axs[1].imshow(img, cmap=cm.binary)
#S3 = f_gen_rate(prior_sample)[:, :784].astype('float32')
#img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
#axs[2, 2].imshow(img, cmap=cm.nipy_spectral)
# TODO: Axis titles, plot title, make this work if one selects two dimensions out of more than two (i.e. if n_latent>2)
from scipy.stats import norm as normal_distribution
unit_interval_positions = np.linspace(0.025, 0.975, 20)
positions = normal_distribution.ppf(unit_interval_positions)
print unit_interval_positions
print positions
latent_array = np.zeros((400, 2))
latent_array[:, 1] = -np.repeat(positions, 20) # because images are filled top -> bottom, left -> right (row by row)
latent_array[:, 0] = np.tile(positions, 20)
fig, axs = plt.subplots(1, 1, figsize=(24, 24))
F = f_gen_rate(cast_array_to_local_type(latent_array)).astype('float32')
img = tile_raster_images(F, image_dims, (20, 20), (1, 1))
#axs.imshow(img, cmap=cm.nipy_spectral)
axs.imshow(img, cmap=cm.binary)
L = f_z_sample(X)
L_init = f_z_init_sample(X)
dim1 = 0
dim2 = 1
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(L[:, dim1], L[:, dim2], c=Z[:].argmax(1), lw=0, s=5, alpha=.2)
axs[1].scatter(L_init[:, dim1], L_init[:, dim2], c=Z[:].argmax(1), lw=0, s=5, alpha=.2)
cax = fig.add_axes([0.95, 0.2, 0.02, 0.6])
cax.scatter(np.repeat(0, 10), np.arange(10), c=np.arange(10), lw=0, s=300)
cax.set_xlim(-0.1, 0.1)
cax.set_ylim(-0.5, 9.5)
plt.yticks(np.arange(10))
plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
cax.tick_params(axis='y', colors='white')
for tick in cax.yaxis.get_major_ticks():
tick.label.set_fontsize(14)
tick.label.set_color('black')
cax.spines['bottom'].set_color('white')
cax.spines['top'].set_color('white')
cax.spines['right'].set_color('white')
cax.spines['left'].set_color('white')
axs[0].set_title('After HMC steps')
axs[1].set_title('Initial recognition model')
axs[0].set_xlim(-3, 3)
axs[0].set_ylim(-3, 3)
axs[1].set_xlim(-3, 3)
axs[1].set_ylim(-3, 3)
fig, axs = plt.subplots(4, 5, figsize=(20, 16))
colors = cm.jet(np.linspace(0, 1, 10))
for i in range(5):
axs[0, i].scatter(L_init[Z[:].argmax(1) == i, dim1], L_init[Z[:].argmax(1) == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
axs[1, i].scatter(L[Z[:].argmax(1) == i, dim1], L[Z[:].argmax(1) == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
axs[0, i].set_title(str(i) + ' before HMC')
axs[1, i].set_title(str(i) + ' after HMC')
axs[2, i].scatter(L_init[Z[:].argmax(1) == (5+i), dim1], L_init[Z[:].argmax(1) == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
axs[3, i].scatter(L[Z[:].argmax(1) == (5+i), dim1], L[Z[:].argmax(1) == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
axs[2, i].set_title(str(5+i) + ' before HMC')
axs[3, i].set_title(str(5+i) + ' after HMC')
for j in range(4):
axs[j, i].set_xlim(-3, 3)
axs[j, i].set_ylim(-3, 3)
X_index = 25 # index=0 -> 5, index=1 -> 0, index=2 -> 4, index=3 -> 1, index=24 -> underlined 1, index=39 -> ugly 6
num_repeats = 1000
fig, axs = plt.subplots(1, 2, figsize=(6, 3))
img = tile_raster_images(np.array([X[X_index, :]]), image_dims, (1, 1), (1, 1))
axs[0].imshow(img, cmap=cm.binary)
img = tile_raster_images(np.array([X_no_bin[X_index, :]]), image_dims, (1, 1), (1, 1))
axs[1].imshow(img, cmap=cm.binary)
repeated_X = cast_array_to_local_type(np.tile(np.array([X[X_index, :]]), (num_repeats, 1)).astype('float32'))
full_sample = f_full_sample(repeated_X).astype('float32')
z_samples = full_sample[:, :num_repeats, :]
v_samples = full_sample[:, num_repeats:, :]
z_sample_mean = z_samples[:, :, :].mean(axis=1)
z_sample_std = z_samples[:, :, :].std(axis=1)
single_X = cast_array_to_local_type(np.array([X[X_index, :]]).astype('float32'))
init_mean = f_z_init_mean(single_X)[0]
init_var = f_z_init_var(single_X)[0]
init_vel_var = f_v_init_var(single_X)[0]
print 'Posterior distribution statistics'
print
print 'Initial model: - Mean: ' + str(init_mean)
print ' - Var: ' + str(init_var)
print
print 'Full HVI model: - Mean: ' + str(z_sample_mean[m.n_hmc_steps])
print ' - Var: ' + str(z_sample_std[m.n_hmc_steps] ** 2)
print
print 'Velocity model variance: ' + str(init_vel_var)
mean_loss = m.score(repeated_X).mean()
print 'Mean loss for this X: ', mean_loss
single_z_evolution = z_samples[:, 0, :]
R = f_gen_rate(cast_array_to_local_type(single_z_evolution)).astype('float32')
num_steps = m.n_hmc_steps + 1
num_images = num_steps + 1
fig, axs = plt.subplots(1, num_images, figsize=(9*num_images, 9))
img = tile_raster_images(np.array([X[X_index, :]]), image_dims, (1, 1), (1, 1))
axs[0].imshow(img, cmap=cm.binary) # cmap=cm.nipy_spectral
for i in range(num_steps):
img = tile_raster_images(np.array([R[i]]), image_dims, (1, 1), (1, 1))
axs[1 + i].imshow(img, cmap=cm.binary)
pot_en_of_image = f_pot_en(single_X, cast_array_to_local_type(np.array([single_z_evolution[i]])))[0]
axs[1 + i].set_title('Pot. energy: ' + str(pot_en_of_image), fontsize=32)
dim1 = 0
dim2 = 1
resolution = 201
z_sample_final_mean = z_sample_mean[m.n_hmc_steps]
lower_dim1_limit = z_sample_final_mean[dim1] - 0.5
upper_dim1_limit = z_sample_final_mean[dim1] + 0.5
lower_dim2_limit = z_sample_final_mean[dim2] - 0.5
upper_dim2_limit = z_sample_final_mean[dim2] + 0.5
number_images_per_axis = 11
latent_array = np.zeros((number_images_per_axis**2, 2))
gap_between_images = (resolution - 1)//(number_images_per_axis - 1)
indices_for_images = np.arange(0, resolution, gap_between_images)
pot_energy_matrix = np.zeros((resolution, resolution), dtype='float32')
x = np.linspace(lower_dim1_limit, upper_dim1_limit, resolution)
y = np.linspace(lower_dim2_limit, upper_dim2_limit, resolution)
for i in range(resolution):
for j in range(resolution):
#pos_array = f_z_init_mean(single_X)
pos_array = np.array([z_sample_final_mean])
pos_array[0, dim1] = x[i]
pos_array[0, dim2] = y[j]
pos_array_of_local_type = cast_array_to_local_type(pos_array)
pot_energy_matrix[j, i] = f_pot_en(single_X, pos_array_of_local_type)[0]
if i in indices_for_images and j in indices_for_images:
latent_array[(i//gap_between_images) + (number_images_per_axis - 1 - j//gap_between_images)*number_images_per_axis , :] = pos_array[0, :]
print 'Minimum potential energy (at grid points): ' + str(pot_energy_matrix.min())
print 'Maximum potential energy (at grid points): ' + str(pot_energy_matrix.max())
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
CS = axs[0].contour(x, y, pot_energy_matrix, 40)
plt.clabel(CS, inline=1, fmt='%1.0f', fontsize=10)
axs[0].set_title('Potential energy surface')
F = f_gen_rate(cast_array_to_local_type(latent_array))
img = tile_raster_images(F, image_dims, (number_images_per_axis, number_images_per_axis), (1, 1))
#axs.imshow(img, cmap=cm.nipy_spectral)
axs[1].imshow(img, cmap=cm.binary)
plt.show()
resolution = 200
underlying_variance = f_v_init_var(single_X)
velocity_range_for_images = 10.0 * np.sqrt(underlying_variance[0, :])
lower_dim1_limit = np.around(- velocity_range_for_images[dim1])
upper_dim1_limit = np.around( velocity_range_for_images[dim1])
lower_dim2_limit = np.around(- velocity_range_for_images[dim2])
upper_dim2_limit = np.around( velocity_range_for_images[dim2])
kin_energy_matrix = np.zeros((resolution, resolution), dtype='float32')
kin_x = np.linspace(lower_dim1_limit, upper_dim1_limit, resolution)
kin_y = np.linspace(lower_dim2_limit, upper_dim2_limit, resolution)
for i in range(resolution):
for j in range(resolution):
vel_array = np.zeros((1, m.n_latent)).astype('float32')
vel_array[0, dim1] = kin_x[i]
vel_array[0, dim2] = kin_y[j]
vel_array_of_local_type = cast_array_to_local_type(vel_array)
kin_energy_matrix[j, i] = f_kin_en(single_X, vel_array_of_local_type)
print 'Minimum kinetic energy (at grid points): ' + str(kin_energy_matrix.min())
print 'Maximum kinetic energy (at grid points): ' + str(kin_energy_matrix.max())
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
CS = ax.contour(kin_x, kin_y, kin_energy_matrix)
plt.axes().set_aspect('equal', 'datalim')
plt.clabel(CS, inline=1, fmt='%1.1f', fontsize=10)
ax.set_title('Kinetic energy surface')
plt.show()
fig, axs = plt.subplots(m.n_hmc_steps + 1, 3, figsize=(18, (m.n_hmc_steps + 1) * 6))
colors = cm.jet(np.linspace(0, 1, 10))
#contour_levels = (198, 200, 202, 204, 206, 208, 210)
#contour_levels = (120, 130, 140, 150, 160, 180, 200, 240, 280)
#contour_levels = (100, 102, 104, 106, 108, 110, 115, 120, 125, 130)
#contour_levels = (400, 402, 404, 406, 408, 410, 412, 416, 420)
#contour_levels = (106, 108, 110, 112, 114, 116, 118, 120, 124, 128)
contour_levels = (160, 165, 170, 175, 180, 185, 190, 195, 200, 210, 220, 230, 240, 250, 270, 300)
#contour_levels = (174, 175, 176, 177, 178, 180, 182, 184, 186, 190, 200)
#contour_levels = (59, 61, 63, 65, 67, 69, 71, 73, 75, 80, 85, 90)
vel_contour_levels = np.linspace(2.0, 70.0, 18)
#CS0 = axs[0, 0].contourf(x, y, pot_energy_matrix, np.linspace(155, 240, 500))
def colour_for_z_samples(samples):
mean = samples.mean(axis=0)
mean1 = mean[dim1]
mean2 = mean[dim2]
colour = np.zeros_like(samples[:, 0])
colour[np.logical_and(samples[:, dim1] < mean1, samples[:, dim2] < mean2)] = 0
colour[np.logical_and(samples[:, dim1] < mean1, samples[:, dim2] >= mean2)] = 2
colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] < mean2)] = 4
colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] >= mean2)] = 7
colour[((samples[:, dim1] - mean1) ** 2 + (samples[:, dim2] - mean2) ** 2) < 1e-5] = 9
return colour.astype('int32')
for i in range(m.n_hmc_steps + 1):
CS = axs[i, 0].contour(x, y, pot_energy_matrix, contour_levels)
plt.clabel(CS, inline=1, fmt='%1.0f', fontsize=10)
axs[i, 0].scatter(z_samples[i,:,dim1], z_samples[i,:,dim2], c=colors[colour_for_z_samples(z_samples[i,:,:])], s=20, alpha=.3, lw=0)
CS_vel = axs[i, 1].contour(kin_x, kin_y, kin_energy_matrix, vel_contour_levels)
plt.clabel(CS_vel, inline=1, fmt='%1.1f', fontsize=10)
axs[i, 1].scatter(v_samples[i,:,dim1], v_samples[i,:,dim2], c=colors[colour_for_z_samples(z_samples[i,:,:])], s=20, alpha=.3, lw=0)
pot_energy_distrib = f_pot_en(repeated_X, cast_array_to_local_type(z_samples[i, :, :]))
if i == 0:
max_x_value_for_hist = pot_energy_distrib.max() + 5
min_x_value_for_hist = np.floor(pot_energy_matrix.min()) -5
pot_energy_distrib_mean = pot_energy_distrib.mean()
axs[i, 2].hist(pot_energy_distrib, 30, normed=1, range=(min_x_value_for_hist, max_x_value_for_hist))
axs[i, 2].autoscale(enable=False, axis='both')
axs[i, 2].axvline(pot_energy_distrib_mean, color='r', linestyle='dashed', linewidth=2)
axs[i, 2].set_xlim(min_x_value_for_hist, max_x_value_for_hist)
axs[i, 2].text(pot_energy_distrib_mean + 1.0, 0.8*axs[i, 2].get_ylim()[1], 'Mean: ' + str(pot_energy_distrib_mean))
axs[i, 1].set_xlim(-velocity_range_for_images[dim1], velocity_range_for_images[dim1])
axs[i, 1].set_ylim(-velocity_range_for_images[dim2], velocity_range_for_images[dim2])
axs[i, 1].set_aspect('equal', 'datalim')
axs[i, 0].set_aspect('equal', 'datalim')
axs[0, 0].scatter(f_z_init_mean(single_X)[0, dim1], f_z_init_mean(single_X)[0, dim2], c='black', s=20)
plt.show()
z_pos = cast_array_to_local_type(np.array([position_array[3]])) # [z_sample_mean[m.n_hmc_steps]]
second_deriv0 = f_pot_en_2nd_deriv0(single_X, z_pos)
second_deriv1 = f_pot_en_2nd_deriv1(single_X, z_pos)
second_deriv = np.concatenate([second_deriv0, second_deriv1], axis=0)
det = np.linalg.det(second_deriv)
trace = np.trace(second_deriv)
if trace >= 0 and det >= 0:
eigvalstring = '+ +'
elif det < 0:
eigvalstring = '+ -'
else: # so trace < 0 and det >= 0
eigvalstring = '- -'
print 'Potential energy:'
print f_pot_en(single_X, z_pos)
print
print '1st derivative:'
print f_pot_en_deriv(single_X, z_pos)
print
print '2nd derivative:'
print second_deriv
print
print '2nd deriv. eigenvalue signs: ', eigvalstring
#debugprint(f_z1_by_z0.theano_func.theano_func)
zposss = cast_array_to_local_type(np.array([position_array[0]]))
vposss = cast_array_to_local_type(np.array([[0.0, 0.0]]).astype('float32'))
print f_z1_by_z0(single_X, zposss, vposss)
print f_z2_by_z0(single_X, zposss, vposss)
print f_z3_by_z0(single_X, zposss, vposss)
print f_z4_by_z0(single_X, zposss, vposss)
print
print
print f_zT_by_z0(single_X, zposss, vposss)
deriv0 = 0.5*(1/ np.array([[0.79016948, 0.79016948], [0.57629448, 0.57629448]])) * np.array([[1416.46179199, -101.45363617], [-101.4536972, 915.88116455]])
deriv1 = (1/ np.array([[0.79016948, 0.79016948], [0.57629448, 0.57629448]])) * np.array([[1430.79699707, -107.75597382], [-107.75673676, 919.03118896]])
deriv2 = (1/ np.array([[0.79016948, 0.79016948], [0.57629448, 0.57629448]])) * np.array([[1466.14794922, -125.28180695], [-125.28205109, 927.0491333]])
deriv3 = (1/ np.array([[0.79016948, 0.79016948], [0.57629448, 0.57629448]])) * np.array([[1500.26269531, -147.59794617], [ -147.59846497, 935.75341797]])
print np.array([[1.0, 0.0], [0.0, 1.0]]) \
- (0.01044571 ** 2) * \
(
1.0*deriv0 \
) \
+ (0.01044571 ** 4) * \
(
0.0
)
print np.array([[1.0, 0.0], [0.0, 1.0]]) \
- (0.01044 ** 2) * \
(
2.0*deriv0 +\
1.0*deriv1
) \
+ (0.01044 ** 4) * \
(
1.0*np.dot(deriv0, deriv1)
)
print np.array([[1.0, 0.0], [0.0, 1.0]]) \
- (0.01044 ** 2) * \
(
3.0*deriv0 +\
2.0*deriv1 +\
1.0*deriv2
) \
+ (0.01044 ** 4) * \
(
2.0*np.dot(deriv0, deriv1) +\
2.0*np.dot(deriv0, deriv2) +\
1.0*np.dot(deriv1, deriv2)
)
- (0.01044 ** 6) * \
(
1.0*np.dot(deriv0, np.dot(deriv1, deriv2))
)
print np.array([[1.0, 0.0], [0.0, 1.0]]) \
- (0.01044 ** 2) * \
(
4.0*deriv0 +\
3.0*deriv1 +\
2.0*deriv2 +\
1.0*deriv3
) \
+ (0.01044 ** 4) * \
(
3.0*np.dot(deriv0, deriv1) +\
3.0*np.dot(deriv0, deriv2) +\
2.0*np.dot(deriv1, deriv2) +\
3.0*np.dot(deriv0, deriv3) +\
2.0*np.dot(deriv1, deriv3) +\
1.0*np.dot(deriv2, deriv3)
) # order 6 and order 8 terms
np.random.seed(1)
velocity_noise = cast_array_to_local_type(np.random.normal(size=(m.n_hmc_steps, 1, m.n_latent)))
velocity_noise = cast_array_to_local_type(np.zeros_like(velocity_noise))
init_pos = f_z_init_mean(single_X) # + np.array([0.0, 0.1])
init_vel = f_kin_energy_sample_from_noise(single_X, velocity_noise[0])
num_vels_per_hmc = (m.n_lf_steps + 2)
position_array = np.zeros((m.n_hmc_steps * m.n_lf_steps + 1, m.n_latent))
position_array[0] = init_pos
velocity_array = np.zeros((m.n_hmc_steps * num_vels_per_hmc, m.n_latent))
velocity_array[0] = ma.assert_numpy(init_vel)
for hmc_num in range(m.n_hmc_steps):
if hmc_num == 0:
curr_pos = cast_array_to_local_type(init_pos)
curr_vel = init_vel
else:
curr_vel = f_updated_vel_from_noise(single_X, curr_vel, velocity_noise[hmc_num])
velocity_array[hmc_num * (m.n_lf_steps + 2)] = ma.assert_numpy(curr_vel)
lf_step_results = f_perform_lf_steps(single_X, curr_pos, curr_vel)
pos_steps = lf_step_results[:m.n_lf_steps]
vel_half_steps_and_final = lf_step_results[m.n_lf_steps:]
final_vel = lf_step_results[-1]
final_pos = pos_steps[-1]
position_array[hmc_num * m.n_lf_steps + 1: (hmc_num + 1)*m.n_lf_steps + 1] = ma.assert_numpy(pos_steps[:, 0, :])
velocity_array[hmc_num * num_vels_per_hmc + 1: (hmc_num + 1) * num_vels_per_hmc] = ma.assert_numpy(vel_half_steps_and_final[:, 0, :])
curr_pos = final_pos
curr_vel = final_vel
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
step_color = cm.jet(np.linspace(0, 1, position_array.shape[0]))
CS = axs[0].contour(x, y, pot_energy_matrix, 40)
CS_vel = axs[1].contour(kin_x, kin_y, kin_energy_matrix, vel_contour_levels)
hmc_step_indices = np.arange(0, position_array.shape[0], m.n_lf_steps)
size_array = 40*np.ones((position_array.shape[0],))
size_array[hmc_step_indices] = 100
axs[0].scatter(position_array[:, dim1], position_array[:, dim2], c=step_color, lw=1, s=size_array)
axs[1].set_color_cycle(step_color)
for hmc_num in range(m.n_hmc_steps):
curr_vel_range = np.arange(num_vels_per_hmc * hmc_num, num_vels_per_hmc * (hmc_num + 1) - 2)
init_vel_ind = hmc_num * num_vels_per_hmc
final_vel_ind = (hmc_num + 1) * num_vels_per_hmc - 1
curr_index = hmc_step_indices[hmc_num]
next_index = hmc_step_indices[hmc_num + 1]
for j in curr_vel_range:
axs[1].plot(velocity_array[j:j+2, dim1], velocity_array[j:j+2, dim2], lw=2)
axs[1].scatter(velocity_array[init_vel_ind, dim1], velocity_array[init_vel_ind, dim2], c=step_color[curr_index], lw=0, s=100)
axs[1].scatter(velocity_array[final_vel_ind, dim1], velocity_array[final_vel_ind, dim2], c=step_color[next_index], lw=0, s=100)
for hmc_num in range(m.n_hmc_steps):
final_vel_ind = (hmc_num + 1) * num_vels_per_hmc - 1
next_index = hmc_step_indices[hmc_num + 1]
axs[1].plot(velocity_array[final_vel_ind-1:final_vel_ind+1, dim1], velocity_array[final_vel_ind-1:final_vel_ind+1, dim2], lw=2, c=step_color[next_index])
axs[0].set_aspect('equal', 'datalim')
axs[1].set_aspect('equal', 'datalim')
inv_model_mean_output = np.zeros((m.n_hmc_steps, m.n_latent, num_repeats, m.n_latent))
inv_model_var_output = np.zeros((m.n_hmc_steps, m.n_latent, num_repeats, m.n_latent))
for i in range(m.n_hmc_steps):
variation_start = z_sample_mean[i] - 2*z_sample_std[i]
variation_end = z_sample_mean[i] + 2*z_sample_std[i]
for variation_dim in range(m.n_latent):
z_variation = np.linspace(variation_start[variation_dim], variation_end[variation_dim], num_repeats)
sample_array = np.tile(z_sample_mean[i], (num_repeats, 1))
sample_array[:, variation_dim] = z_variation
# TODO: Implement this for partial_velocity_update=True
inv_model_mean_output[i, variation_dim] = f_aux_inv_mean(repeated_X, cast_array_to_local_type(sample_array), cast_array_to_local_type(np.array([i]).astype('float32')))
inv_model_var_output[i, variation_dim] = f_aux_inv_var(repeated_X, cast_array_to_local_type(sample_array), cast_array_to_local_type(np.array([i]).astype('float32')))
variation_start = z_sample_mean[m.n_hmc_steps] - 2*z_sample_std[m.n_hmc_steps]
variation_end = z_sample_mean[m.n_hmc_steps] + 2*z_sample_std[m.n_hmc_steps]
print variation_start
print variation_end
final_vel_model_mean_output = np.zeros((m.n_latent, num_repeats, m.n_latent))
final_vel_model_var_output = np.zeros((m.n_latent, num_repeats, m.n_latent))
for variation_dim in range(m.n_latent):
z_variation = np.linspace(variation_start[variation_dim], variation_end[variation_dim], num_repeats)
sample_array = np.tile(z_sample_final_mean, (num_repeats, 1))
sample_array[:, variation_dim] = z_variation
final_vel_model_mean_output[variation_dim] = f_v_final_mean(repeated_X, cast_array_to_local_type(sample_array))
final_vel_model_var_output[variation_dim] = f_v_final_var(repeated_X, cast_array_to_local_type(sample_array))
fig, axs = plt.subplots(m.n_hmc_steps, 2, figsize=(18, 9*m.n_hmc_steps))
for i in range(m.n_hmc_steps - 1):
axs[i, 0].scatter(final_vel_model_mean_output[:, :, dim1],
inv_model_mean_output[i+1, :, :, dim2],
c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))),
lw=0, s=5)
axs[i, 1].scatter(final_vel_model_var_output[:, :, dim1],
inv_model_var_output[i+1, :, :, dim2],
c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))),
lw=0, s=5)
axs[i, 0].set_title('Aux. inv. model ' + str(i+1) + ': Mean')
axs[i, 1].set_title('Final vel. model ' + str(i+1) + ': Variance')
if m.n_hmc_steps > 1:
axis_final0 = axs[m.n_hmc_steps-1, 0]
axis_final1 = axs[m.n_hmc_steps-1, 1]
else:
axis_final0 = axs[0]
axis_final1 = axs[1]
axis_final0.scatter(final_vel_model_mean_output[:, :, dim1],
final_vel_model_mean_output[:, :, dim2],
c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))),
lw=0, s=5)
axis_final1.scatter(final_vel_model_var_output[:, :, dim1],
final_vel_model_var_output[:, :, dim2],
c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))),
lw=0, s=5)
axis_final0.set_title('Final vel. model: Mean')
axis_final1.set_title('Final vel. model: Variance')
plt.show()
final_z_samples = cast_array_to_local_type(z_samples[m.n_hmc_steps, :, :])
final_v_samples = cast_array_to_local_type(v_samples[m.n_hmc_steps, :, :])
final_vel_mean = f_v_final_mean(repeated_X, final_z_samples)
final_vel_var = f_v_final_var(repeated_X, final_z_samples)
final_vel_nll = f_v_final_model_nll(repeated_X, final_z_samples, final_v_samples)
print f_kin_energy_nll(single_X).sum(-1)
print final_vel_nll.mean()
print final_vel_nll.min()
print final_vel_nll.max()
fig, axs = plt.subplots(4, 2, figsize=(18, 36))
# TODO: Analysis of how final_vel_mean and final_vel_var depend on z (since they all share the same x)
print z_samples[3, :, :].mean(axis=0)
print z_samples[3, :, :].var(axis=0)
print v_samples[3, :, :].mean(axis=0)
print v_samples[3, :, :].var(axis=0)
print f_v_init_var(np.array([X[X_index, :]]))
print final_vel_nll.mean()
plt.boxplot(final_vel_nll, whis=1)
plt.show()
centers = np.zeros((10,n_latents))
stddevs = np.zeros((10,n_latents))
centers_init = np.zeros((10,n_latents))
stddevs_init = np.zeros((10,n_latents))
for i in range(10):
Li = f_z_sample(X[Z.argmax(1) == i])
centers[i] = Li.mean(axis=0)
stddevs[i] = np.std(Li, axis=0)
Li_init = f_z_init_sample(X[Z.argmax(1) == i])
centers_init[i] = Li_init.mean(axis=0)
stddevs_init[i] = np.std(Li_init, axis=0)
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[0].scatter(centers_init[:, dim1], centers_init[:, dim2], c=range(10), s=50, marker=u's')
axs[1].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[1].scatter(centers[:, dim1] + stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'>')
axs[1].scatter(centers[:, dim1] - stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'<')
axs[1].scatter(centers[:, dim1], centers[:, dim2] + stddevs[:, dim2], c=range(10), s=50, marker=u'^')
axs[1].scatter(centers[:, dim1], centers[:, dim2] - stddevs[:, dim2], c=range(10), s=50, marker=u'v')
#axs[0].set_xlim(-1.2, 1.2)
#axs[0].set_ylim(-1.2, 1.2)
#axs[1].set_xlim(-1.2, 1.2)
#axs[1].set_ylim(-1.2, 1.2)
print (centers[:, dim1] - centers_init[:, dim1])
print (centers[:, dim2] - centers_init[:, dim2])
print (stddevs[:, dim1] - stddevs_init[:, dim1])
print (stddevs[:, dim2] - stddevs_init[:, dim2])